import pandas as pd  
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder  
from sklearn.cluster import KMeans  
import pickle
  
# Load the dataset  
credit_customers = pd.read_csv("credit_customers.csv")  
  
# Extract the important columns  
important_columns = ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status']  
data_for_clustering = credit_customers[important_columns].copy()  
  
# Preprocess the data by applying Label Encoding to 'savings_status' and 'employment'  
data_for_clustering['savings_status'] = LabelEncoder().fit_transform(data_for_clustering['savings_status'])  
data_for_clustering['employment'] = LabelEncoder().fit_transform(data_for_clustering['employment'])  
  
# Apply One-Hot Encoding to 'credit_history'  
data_for_clustering = pd.get_dummies(data_for_clustering, columns=['credit_history'], drop_first=True)  
  
# Normalize the data using Standard Scaling  
data_for_clustering_scaled = StandardScaler().fit_transform(data_for_clustering)  
  
# Perform K-means clustering with 4 clusters  
kmeans = KMeans(n_clusters=4, random_state=42)  
cluster_labels = kmeans.fit_predict(data_for_clustering_scaled)  
    
# Add the cluster labels to the original dataset  
credit_customers['cluster'] = cluster_labels  
     
# Identify additional customer segments with potential for responsiveness to promotions and financing options  
# Criteria: customers with a good credit history and a high credit amount
good_credit_history = credit_customers['credit_history'].isin(['no credits/all paid'])  
high_credit_amount = credit_customers['credit_amount'] > credit_customers['credit_amount'].quantile(0.75)  
  
# Combine the filters and apply them to the dataset  
potential_customers = credit_customers[good_credit_history & high_credit_amount]  
  
# Extract the additional customer segments  
additional_customer_segments = potential_customers['cluster'].unique().tolist()  
  
# Exclude the target customer segments identified in the previous step  
target_customer_segments = [1, 2]  # Replace this list with the target customer segments from the previous step  
additional_customer_segments = [segment for segment in additional_customer_segments if segment not in target_customer_segments]  
  
# Return the list of additional customer segments     
print("additional_customer_segments:\n", additional_customer_segments)  
pickle.dump(additional_customer_segments,open("./ref_result/additional_customer_segments.pkl","wb"))
